commit 5725527e969ef5d52759e35d253f6ca9b05741d4
Author: Yusuke Suzuki <ysuzuki@apple.com>
Date:   Thu Mar 2 03:42:51 2023 -0800

    [JSC] Remove m_dataScratch register in WasmBBQJIT
    https://bugs.webkit.org/show_bug.cgi?id=253230
    rdar://106129794
    
    Reviewed by Mark Lam.
    
    Using m_dataScratch caused *so* many problems. It is rax in x64, which causes the
    problem that returnValueGPR is not usable as a scratch. And our calling convension
    needs a hack to avoid this problem.
    
    Also, we do not need m_dataScratch much. MacroAssembler itself has a scratch register
    for normal use (non B3 / Air use), so that we can implement a lot of operations in
    MacroAssembler without using m_dataScratch.
    
    * Source/JavaScriptCore/assembler/MacroAssembler.h:
    (JSC::MacroAssembler::moveFloat):
    (JSC::MacroAssembler::moveDouble):
    * Source/JavaScriptCore/assembler/MacroAssemblerARM64.h:
    (JSC::MacroAssemblerARM64::transfer32):
    (JSC::MacroAssemblerARM64::materializeVector):
    * Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h:
    (JSC::MacroAssemblerX86_64::transfer32):
    (JSC::MacroAssemblerX86_64::materializeVector):
    * Source/JavaScriptCore/wasm/WasmBBQJIT.cpp:
    (JSC::Wasm::BBQJIT::BBQJIT):
    (JSC::Wasm::BBQJIT::addF32ConvertUI32):
    (JSC::Wasm::BBQJIT::addF32ConvertUI64):
    (JSC::Wasm::BBQJIT::addF64ConvertUI32):
    (JSC::Wasm::BBQJIT::addF64ConvertUI64):
    (JSC::Wasm::BBQJIT::addF32Copysign):
    (JSC::Wasm::BBQJIT::addF64Copysign):
    (JSC::Wasm::BBQJIT::addF32Abs):
    (JSC::Wasm::BBQJIT::addF64Abs):
    (JSC::Wasm::BBQJIT::restoreWebAssemblyGlobalState):
    (JSC::Wasm::BBQJIT::emitIndirectCall):
    (JSC::Wasm::BBQJIT::materializeVectorConstant):
    (JSC::Wasm::BBQJIT::addSIMDV_V):
    (JSC::Wasm::BBQJIT::emitVectorMul):
    (JSC::Wasm::BBQJIT::addSIMDV_VV):
    (JSC::Wasm::BBQJIT::isScratch):
    (JSC::Wasm::BBQJIT::emitMoveConst):
    (JSC::Wasm::BBQJIT::emitMoveMemory):
    
    Canonical link: https://commits.webkit.org/261060@main

Index: webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssembler.h
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/assembler/MacroAssembler.h
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssembler.h
@@ -1606,12 +1606,6 @@
             move(imm.asTrustedImm64(), dest);
     }
 
-    void moveDouble(Imm64 imm, FPRegisterID dest)
-    {
-        move(imm, scratchRegister());
-        move64ToDouble(scratchRegister(), dest);
-    }
-
     void and64(Imm32 imm, RegisterID dest)
     {
         if (shouldBlind(imm)) {
@@ -1624,6 +1618,34 @@
 
 #endif // USE(JSVALUE64)
 
+#if CPU(X86_64)
+    void moveFloat(Imm32 imm, FPRegisterID dest)
+    {
+        move(imm, scratchRegister());
+        move32ToFloat(scratchRegister(), dest);
+    }
+
+    void moveDouble(Imm64 imm, FPRegisterID dest)
+    {
+        move(imm, scratchRegister());
+        move64ToDouble(scratchRegister(), dest);
+    }
+#endif
+
+#if CPU(ARM64)
+    void moveFloat(Imm32 imm, FPRegisterID dest)
+    {
+        move(imm, getCachedMemoryTempRegisterIDAndInvalidate());
+        move32ToFloat(getCachedMemoryTempRegisterIDAndInvalidate(), dest);
+    }
+
+    void moveDouble(Imm64 imm, FPRegisterID dest)
+    {
+        move(imm, getCachedMemoryTempRegisterIDAndInvalidate());
+        move64ToDouble(getCachedMemoryTempRegisterIDAndInvalidate(), dest);
+    }
+#endif
+
 #if !CPU(X86) && !CPU(X86_64) && !CPU(ARM64)
     // We should implement this the right way eventually, but for now, it's fine because it arises so
     // infrequently.
Index: webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
@@ -2026,6 +2026,12 @@
         store64(dataTempRegister, address);
     }
 
+    void transfer32(Address src, Address dest)
+    {
+        load32(src, getCachedDataTempRegisterIDAndInvalidate());
+        store32(getCachedDataTempRegisterIDAndInvalidate(), dest);
+    }
+
     void transfer64(Address src, Address dest)
     {
         load64(src, getCachedDataTempRegisterIDAndInvalidate());
@@ -2638,6 +2644,14 @@
         m_assembler.vorr<128>(dest, src, src);
     }
 
+    void materializeVector(v128_t value, FPRegisterID dest)
+    {
+        move(TrustedImm64(value.u64x2[0]), scratchRegister());
+        vectorReplaceLaneInt64(TrustedImm32(0), scratchRegister(), dest);
+        move(TrustedImm64(value.u64x2[1]), scratchRegister());
+        vectorReplaceLaneInt64(TrustedImm32(1), scratchRegister(), dest);
+    }
+
     void moveZeroToDouble(FPRegisterID reg)
     {
         m_assembler.fmov<64>(reg, ARM64Registers::zr);
@@ -2663,11 +2677,23 @@
         m_assembler.fmov<64>(dest, src);
     }
 
+    void move64ToDouble(TrustedImm64 imm, FPRegisterID dest)
+    {
+        move(imm, getCachedDataTempRegisterIDAndInvalidate());
+        m_assembler.fmov<64>(dest, dataTempRegister);
+    }
+
     void move32ToFloat(RegisterID src, FPRegisterID dest)
     {
         m_assembler.fmov<32>(dest, src);
     }
 
+    void move32ToFloat(TrustedImm32 imm, FPRegisterID dest)
+    {
+        move(imm, getCachedDataTempRegisterIDAndInvalidate());
+        m_assembler.fmov<32>(dest, dataTempRegister);
+    }
+
     void moveConditionallyDouble(DoubleCondition cond, FPRegisterID left, FPRegisterID right, RegisterID src, RegisterID dest)
     {
         m_assembler.fcmp<64>(left, right);
Index: webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
@@ -1188,6 +1188,12 @@
         store64(src2, Address(dest, offset.m_value + 8));
     }
 
+    void transfer32(Address src, Address dest)
+    {
+        load32(src, scratchRegister());
+        store32(scratchRegister(), dest);
+    }
+
     void transfer64(Address src, Address dest)
     {
         load64(src, scratchRegister());
@@ -1224,6 +1230,15 @@
             m_assembler.movd_rr(src, dest);
     }
 
+    void move32ToFloat(TrustedImm32 imm, FPRegisterID dest)
+    {
+        move(imm, scratchRegister());
+        if (supportsAVX())
+            m_assembler.vmovd_rr(scratchRegister(), dest);
+        else
+            m_assembler.movd_rr(scratchRegister(), dest);
+    }
+
     void move64ToDouble(RegisterID src, FPRegisterID dest)
     {
         if (supportsAVX())
@@ -1232,6 +1247,15 @@
             m_assembler.movq_rr(src, dest);
     }
 
+    void move64ToDouble(TrustedImm64 imm, FPRegisterID dest)
+    {
+        move(imm, scratchRegister());
+        if (supportsAVX())
+            m_assembler.vmovq_rr(scratchRegister(), dest);
+        else
+            m_assembler.movq_rr(scratchRegister(), dest);
+    }
+
     void moveDoubleTo64(FPRegisterID src, RegisterID dest)
     {
         if (supportsAVX())
@@ -1247,7 +1271,15 @@
         else
             m_assembler.movaps_rr(src, dest);
     }
-    
+
+    void materializeVector(v128_t value, FPRegisterID dest)
+    {
+        move(TrustedImm64(value.u64x2[0]), scratchRegister());
+        vectorReplaceLaneInt64(TrustedImm32(0), scratchRegister(), dest);
+        move(TrustedImm64(value.u64x2[1]), scratchRegister());
+        vectorReplaceLaneInt64(TrustedImm32(1), scratchRegister(), dest);
+    }
+
     void loadVector(TrustedImmPtr address, FPRegisterID dest)
     {
         move(address, scratchRegister());
Index: webkit2gtk-2.39.91/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
===================================================================
--- webkit2gtk-2.39.91.orig/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
+++ webkit2gtk-2.39.91/Source/JavaScriptCore/wasm/WasmBBQJIT.cpp
@@ -1164,7 +1164,6 @@
         RegisterSetBuilder callerSaveFprs = fprSetBuilder;
 
         gprSetBuilder.remove(m_scratchGPR);
-        gprSetBuilder.remove(m_dataScratchGPR);
         fprSetBuilder.remove(m_scratchFPR);
 
         m_gprSet = m_validGPRs = gprSetBuilder.buildAndValidate();
@@ -5248,10 +5247,12 @@
             "F32ConvertUI32", TypeKind::F32,
             BLOCK(Value::fromF32(static_cast<uint32_t>(operand.asI32()))),
             BLOCK(
-                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
 #if CPU(X86_64)
-                m_jit.convertUInt64ToFloat(m_scratchGPR, resultLocation.asFPR(), m_dataScratchGPR);
+                ScratchScope<1, 0> scratches(*this);
+                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
+                m_jit.convertUInt64ToFloat(m_scratchGPR, resultLocation.asFPR(), scratches.gpr(0));
 #else
+                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
                 m_jit.convertUInt64ToFloat(m_scratchGPR, resultLocation.asFPR());
 #endif
             )
@@ -5276,7 +5277,7 @@
             BLOCK(Value::fromF32(static_cast<uint64_t>(operand.asI64()))),
             BLOCK(
 #if CPU(X86_64)
-                m_jit.convertUInt64ToFloat(operandLocation.asGPR(), resultLocation.asFPR(), m_dataScratchGPR);
+                m_jit.convertUInt64ToFloat(operandLocation.asGPR(), resultLocation.asFPR(), m_scratchGPR);
 #else
                 m_jit.convertUInt64ToFloat(operandLocation.asGPR(), resultLocation.asFPR());
 #endif
@@ -5301,10 +5302,12 @@
             "F64ConvertUI32", TypeKind::F64,
             BLOCK(Value::fromF64(static_cast<uint32_t>(operand.asI32()))),
             BLOCK(
-                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
 #if CPU(X86_64)
-                m_jit.convertUInt64ToDouble(m_scratchGPR, resultLocation.asFPR(), m_dataScratchGPR);
+                ScratchScope<1, 0> scratches(*this);
+                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
+                m_jit.convertUInt64ToDouble(m_scratchGPR, resultLocation.asFPR(), scratches.gpr(0));
 #else
+                m_jit.zeroExtend32ToWord(operandLocation.asGPR(), m_scratchGPR);
                 m_jit.convertUInt64ToDouble(m_scratchGPR, resultLocation.asFPR());
 #endif
             )
@@ -5329,7 +5332,7 @@
             BLOCK(Value::fromF64(static_cast<uint64_t>(operand.asI64()))),
             BLOCK(
 #if CPU(X86_64)
-                m_jit.convertUInt64ToDouble(operandLocation.asGPR(), resultLocation.asFPR(), m_dataScratchGPR);
+                m_jit.convertUInt64ToDouble(operandLocation.asGPR(), resultLocation.asFPR(), m_scratchGPR);
 #else
                 m_jit.convertUInt64ToDouble(operandLocation.asGPR(), resultLocation.asFPR());
 #endif
@@ -5350,8 +5353,7 @@
                 m_jit.and32(Imm32(static_cast<int32_t>(0x80000000u)), m_scratchGPR, m_scratchGPR);
                 m_jit.move32ToFloat(m_scratchGPR, m_scratchFPR);
 #if CPU(X86_64)
-                m_jit.move(Imm32(0x7fffffff), m_scratchGPR);
-                m_jit.move32ToFloat(m_scratchGPR, resultLocation.asFPR());
+                m_jit.move32ToFloat(TrustedImm32(0x7fffffff), resultLocation.asFPR());
                 m_jit.andFloat(lhsLocation.asFPR(), resultLocation.asFPR());
                 m_jit.orFloat(resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
 #else
@@ -5370,8 +5372,7 @@
                 } else {
                     bool signBit = bitwise_cast<uint32_t>(rhs.asF32()) & 0x80000000u;
 #if CPU(X86_64)
-                    m_jit.move(Imm32(0x7fffffff), m_scratchGPR);
-                    m_jit.move32ToFloat(m_scratchGPR, resultLocation.asFPR());
+                    m_jit.move32ToFloat(TrustedImm32(0x7fffffff), resultLocation.asFPR());
                     m_jit.andFloat(lhsLocation.asFPR(), resultLocation.asFPR());
                     if (signBit) {
                         m_jit.xorFloat(m_scratchFPR, m_scratchFPR);
@@ -5408,8 +5409,7 @@
                 m_jit.move64ToDouble(m_scratchGPR, m_scratchFPR);
 
 #if CPU(X86_64)
-                m_jit.move(TrustedImm64(0x7fffffffffffffffll), m_scratchGPR);
-                m_jit.move64ToDouble(m_scratchGPR, resultLocation.asFPR());
+                m_jit.move64ToDouble(TrustedImm64(0x7fffffffffffffffll), resultLocation.asFPR());
                 m_jit.andDouble(lhsLocation.asFPR(), resultLocation.asFPR());
                 m_jit.orDouble(resultLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
 #else
@@ -5430,8 +5430,7 @@
                 } else {
                     bool signBit = bitwise_cast<uint64_t>(rhs.asF64()) & 0x8000000000000000ull;
 #if CPU(X86_64)
-                    m_jit.move(TrustedImm64(0x7fffffffffffffffll), m_scratchGPR);
-                    m_jit.move64ToDouble(m_scratchGPR, resultLocation.asFPR());
+                    m_jit.move64ToDouble(TrustedImm64(0x7fffffffffffffffll), resultLocation.asFPR());
                     m_jit.andDouble(lhsLocation.asFPR(), resultLocation.asFPR());
                     if (signBit) {
                         m_jit.xorDouble(m_scratchFPR, m_scratchFPR);
@@ -5498,8 +5497,7 @@
             BLOCK(Value::fromF32(std::abs(operand.asF32()))),
             BLOCK(
 #if CPU(X86_64)
-                m_jit.move(Imm32(0x7fffffffll), m_scratchGPR);
-                m_jit.move32ToFloat(m_scratchGPR, m_scratchFPR);
+                m_jit.move32ToFloat(TrustedImm32(0x7fffffffll), m_scratchFPR);
                 m_jit.andFloat(operandLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
 #else
                 m_jit.absFloat(operandLocation.asFPR(), resultLocation.asFPR());
@@ -5515,8 +5513,7 @@
             BLOCK(Value::fromF64(std::abs(operand.asF64()))),
             BLOCK(
 #if CPU(X86_64)
-                m_jit.move(TrustedImm64(0x7fffffffffffffffll), m_scratchGPR);
-                m_jit.move64ToDouble(m_scratchGPR, m_scratchFPR);
+                m_jit.move64ToDouble(TrustedImm64(0x7fffffffffffffffll), m_scratchFPR);
                 m_jit.andDouble(operandLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
 #else
                 m_jit.absDouble(operandLocation.asFPR(), resultLocation.asFPR());
@@ -6142,7 +6139,7 @@
         // FIXME: We should just store these registers on stack and load them.
         if (!!m_info.memory) {
             m_jit.loadPairPtr(GPRInfo::wasmContextInstancePointer, TrustedImm32(Instance::offsetOfCachedMemory()), GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister);
-            m_jit.cageConditionallyAndUntag(Gigacage::Primitive, GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister, m_dataScratchGPR, /* validateAuth */ true, /* mayBeNull */ false);
+            m_jit.cageConditionallyAndUntag(Gigacage::Primitive, GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister, m_scratchGPR, /* validateAuth */ true, /* mayBeNull */ false);
         }
     }
 
@@ -6298,11 +6295,8 @@
         case TypeKind::Array:
         case TypeKind::Struct:
         case TypeKind::Func: {
-            resultLocation = Location::fromGPR(GPRInfo::argumentGPR0);
-            if constexpr (GPRInfo::argumentGPR0 != GPRInfo::returnValueGPR) {
-                ASSERT(m_dataScratchGPR == GPRInfo::returnValueGPR);
-                m_jit.move(GPRInfo::returnValueGPR, GPRInfo::argumentGPR0);
-            }
+            resultLocation = Location::fromGPR(GPRInfo::returnValueGPR);
+            ASSERT(m_validGPRs.contains(GPRInfo::returnValueGPR, IgnoreVectors));
             break;
         }
         case TypeKind::F32:
@@ -6382,7 +6376,7 @@
         Jump isSameInstance = m_jit.branchPtr(RelationalCondition::Equal, calleeInstance, GPRInfo::wasmContextInstancePointer);
         m_jit.move(calleeInstance, GPRInfo::wasmContextInstancePointer);
         m_jit.loadPairPtr(GPRInfo::wasmContextInstancePointer, TrustedImm32(Instance::offsetOfCachedMemory()), GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister);
-        m_jit.cageConditionallyAndUntag(Gigacage::Primitive, GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister, m_dataScratchGPR, /* validateAuth */ true, /* mayBeNull */ false);
+        m_jit.cageConditionallyAndUntag(Gigacage::Primitive, GPRInfo::wasmBaseMemoryPointer, GPRInfo::wasmBoundsCheckingSizeRegister, m_scratchGPR, /* validateAuth */ true, /* mayBeNull */ false);
         isSameInstance.link(&m_jit);
 
         // Since this can switch instance, we need to keep JSWebAssemblyInstance anchored in the stack.
@@ -6895,12 +6889,8 @@
 #else
             m_jit.compareIntegerVector(RelationalCondition::Equal, SIMDInfo { SIMDLane::i32x4, SIMDSignMode::Unsigned }, result.asFPR(), result.asFPR(), result.asFPR());
 #endif
-        else {
-            m_jit.move(TrustedImm64(value.u64x2[0]), m_dataScratchGPR);
-            m_jit.vectorReplaceLaneInt64(TrustedImm32(0), m_dataScratchGPR, result.asFPR());
-            m_jit.move(TrustedImm64(value.u64x2[1]), m_dataScratchGPR);
-            m_jit.vectorReplaceLaneInt64(TrustedImm32(1), m_dataScratchGPR, result.asFPR());
-        }
+        else
+            m_jit.materializeVector(value, result.asFPR());
     }
 
     ExpressionType WARN_UNUSED_RETURN addConstant(v128_t value)
@@ -7094,12 +7084,10 @@
             }
             if (scalarTypeIsFloatingPoint(info.lane)) {
                 if (info.lane == SIMDLane::f32x4) {
-                    m_jit.move(TrustedImm32(0x7fffffff), m_scratchGPR);
-                    m_jit.move32ToFloat(m_scratchGPR, m_scratchFPR);
+                    m_jit.move32ToFloat(TrustedImm32(0x7fffffff), m_scratchFPR);
                     m_jit.vectorSplatFloat32(m_scratchFPR, m_scratchFPR);
                 } else {
-                    m_jit.move(TrustedImm64(0x7fffffffffffffffll), m_scratchGPR);
-                    m_jit.move64ToDouble(m_scratchGPR, m_scratchFPR);
+                    m_jit.move64ToDouble(TrustedImm64(0x7fffffffffffffffll), m_scratchFPR);
                     m_jit.vectorSplatFloat64(m_scratchFPR, m_scratchFPR);
                 }
                 m_jit.vectorAnd(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, valueLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
@@ -7236,14 +7224,12 @@
                 break;
             case SIMDLane::f32x4:
                 // For floats, we unfortunately have to flip the sign bit using XOR.
-                m_jit.move(TrustedImm32(-0x80000000), m_scratchGPR);
-                m_jit.move32ToFloat(m_scratchGPR, m_scratchFPR);
+                m_jit.move32ToFloat(TrustedImm32(-0x80000000), m_scratchFPR);
                 m_jit.vectorSplatFloat32(m_scratchFPR, m_scratchFPR);
                 m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, valueLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
                 break;
             case SIMDLane::f64x2:
-                m_jit.move(TrustedImm64(-0x8000000000000000ll), m_scratchGPR);
-                m_jit.move64ToDouble(m_scratchGPR, m_scratchFPR);
+                m_jit.move64ToDouble(TrustedImm64(-0x8000000000000000ll), m_scratchFPR);
                 m_jit.vectorSplatFloat64(m_scratchFPR, m_scratchFPR);
                 m_jit.vectorXor(SIMDInfo { SIMDLane::v128, SIMDSignMode::None }, valueLocation.asFPR(), m_scratchFPR, resultLocation.asFPR());
                 break;
@@ -7364,13 +7350,15 @@
         if (info.lane == SIMDLane::i64x2) {
             // Multiplication of 64-bit ints isn't natively supported on ARM or Intel (at least the ones we're targeting)
             // so we scalarize it instead.
+            ScratchScope<1, 0> scratches(*this);
+            GPRReg dataScratchGPR = scratches.gpr(0);
             m_jit.vectorExtractLaneInt64(TrustedImm32(0), left.asFPR(), m_scratchGPR);
-            m_jit.vectorExtractLaneInt64(TrustedImm32(0), right.asFPR(), m_dataScratchGPR);
-            m_jit.mul64(m_scratchGPR, m_dataScratchGPR, m_scratchGPR);
+            m_jit.vectorExtractLaneInt64(TrustedImm32(0), right.asFPR(), dataScratchGPR);
+            m_jit.mul64(m_scratchGPR, dataScratchGPR, m_scratchGPR);
             m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(0), m_scratchGPR, m_scratchFPR);
             m_jit.vectorExtractLaneInt64(TrustedImm32(1), left.asFPR(), m_scratchGPR);
-            m_jit.vectorExtractLaneInt64(TrustedImm32(1), right.asFPR(), m_dataScratchGPR);
-            m_jit.mul64(m_scratchGPR, m_dataScratchGPR, m_scratchGPR);
+            m_jit.vectorExtractLaneInt64(TrustedImm32(1), right.asFPR(), dataScratchGPR);
+            m_jit.mul64(m_scratchGPR, dataScratchGPR, m_scratchGPR);
             m_jit.vectorReplaceLane(SIMDLane::i64x2, TrustedImm32(1), m_scratchGPR, m_scratchFPR);
             m_jit.moveVector(m_scratchFPR, result.asFPR());
         } else
@@ -7549,9 +7537,7 @@
 private:
     bool isScratch(Location loc)
     {
-        return (loc.isGPR() && loc.asGPR() == m_dataScratchGPR)
-            || (loc.isGPR() && loc.asGPR() == m_scratchGPR)
-            || (loc.isFPR() && loc.asFPR() == m_scratchFPR);
+        return (loc.isGPR() && loc.asGPR() == m_scratchGPR) || (loc.isFPR() && loc.asFPR() == m_scratchFPR);
     }
 
     void emitStoreConst(Value constant, Location loc)
@@ -7613,12 +7599,10 @@
             m_jit.move(TrustedImm64(constant.asRef()), loc.asGPR());
             break;
         case TypeKind::F32:
-            m_jit.move(Imm32(constant.asI32()), m_dataScratchGPR);
-            m_jit.move32ToFloat(m_dataScratchGPR, loc.asFPR());
+            m_jit.moveFloat(Imm32(constant.asI32()), loc.asFPR());
             break;
         case TypeKind::F64:
-            m_jit.move(Imm64(constant.asI64()), m_dataScratchGPR);
-            m_jit.move64ToDouble(m_dataScratchGPR, loc.asFPR());
+            m_jit.moveDouble(Imm64(constant.asI64()), loc.asFPR());
             break;
         default:
             RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unimplemented constant typekind.");
@@ -7680,12 +7664,10 @@
         case TypeKind::I32:
         case TypeKind::I31ref:
         case TypeKind::F32:
-            m_jit.load32(srcLocation.asAddress(), m_dataScratchGPR);
-            m_jit.store32(m_dataScratchGPR, dst.asAddress());
+            m_jit.transfer32(srcLocation.asAddress(), dst.asAddress());
             break;
         case TypeKind::I64:
-            m_jit.load64(srcLocation.asAddress(), m_dataScratchGPR);
-            m_jit.store64(m_dataScratchGPR, dst.asAddress());
+            m_jit.transfer64(srcLocation.asAddress(), dst.asAddress());
             break;
         case TypeKind::F64:
             m_jit.loadDouble(srcLocation.asAddress(), m_scratchFPR);
@@ -8395,7 +8377,6 @@
 
     RegisterID m_scratchGPR { GPRInfo::nonPreservedNonArgumentGPR0 }; // Scratch registers to hold temporaries in operations.
     FPRegisterID m_scratchFPR { FPRInfo::nonPreservedNonArgumentFPR0 };
-    RegisterID m_dataScratchGPR { GPRInfo::wasmScratchGPR0 }; // Used specifically as a temporary for complex moves.
 
 #if CPU(X86) || CPU(X86_64)
     RegisterID m_shiftRCX { X86Registers::ecx };
